References

Load packages

library(tidyverse)
library(rstan)
library(bayesplot)

Load data

data("sunspot.year", package = "datasets")
sunspot.year
## Time Series:
## Start = 1700 
## End = 1988 
## Frequency = 1 
##   [1]   5.0  11.0  16.0  23.0  36.0  58.0  29.0  20.0  10.0   8.0   3.0   0.0   0.0   2.0  11.0  27.0  47.0  63.0
##  [19]  60.0  39.0  28.0  26.0  22.0  11.0  21.0  40.0  78.0 122.0 103.0  73.0  47.0  35.0  11.0   5.0  16.0  34.0
##  [37]  70.0  81.0 111.0 101.0  73.0  40.0  20.0  16.0   5.0  11.0  22.0  40.0  60.0  80.9  83.4  47.7  47.8  30.7
##  [55]  12.2   9.6  10.2  32.4  47.6  54.0  62.9  85.9  61.2  45.1  36.4  20.9  11.4  37.8  69.8 106.1 100.8  81.6
##  [73]  66.5  34.8  30.6   7.0  19.8  92.5 154.4 125.9  84.8  68.1  38.5  22.8  10.2  24.1  82.9 132.0 130.9 118.1
##  [91]  89.9  66.6  60.0  46.9  41.0  21.3  16.0   6.4   4.1   6.8  14.5  34.0  45.0  43.1  47.5  42.2  28.1  10.1
## [109]   8.1   2.5   0.0   1.4   5.0  12.2  13.9  35.4  45.8  41.1  30.1  23.9  15.6   6.6   4.0   1.8   8.5  16.6
## [127]  36.3  49.6  64.2  67.0  70.9  47.8  27.5   8.5  13.2  56.9 121.5 138.3 103.2  85.7  64.6  36.7  24.2  10.7
## [145]  15.0  40.1  61.5  98.5 124.7  96.3  66.6  64.5  54.1  39.0  20.6   6.7   4.3  22.7  54.8  93.8  95.8  77.2
## [163]  59.1  44.0  47.0  30.5  16.3   7.3  37.6  74.0 139.0 111.2 101.6  66.2  44.7  17.0  11.3  12.4   3.4   6.0
## [181]  32.3  54.3  59.7  63.7  63.5  52.2  25.4  13.1   6.8   6.3   7.1  35.6  73.0  85.1  78.0  64.0  41.8  26.2
## [199]  26.7  12.1   9.5   2.7   5.0  24.4  42.0  63.5  53.8  62.0  48.5  43.9  18.6   5.7   3.6   1.4   9.6  47.4
## [217]  57.1 103.9  80.6  63.6  37.6  26.1  14.2   5.8  16.7  44.3  63.9  69.0  77.8  64.9  35.7  21.2  11.1   5.7
## [235]   8.7  36.1  79.7 114.4 109.6  88.8  67.8  47.5  30.6  16.3   9.6  33.2  92.6 151.6 136.3 134.7  83.9  69.4
## [253]  31.5  13.9   4.4  38.0 141.7 190.2 184.8 159.0 112.3  53.9  37.5  27.9  10.2  15.1  47.0  93.8 105.9 105.5
## [271] 104.5  66.6  68.9  38.0  34.5  15.5  12.6  27.5  92.5 155.4 154.7 140.5 115.9  66.6  45.9  17.9  13.4  29.2
## [289] 100.2
sunspot_year <- tibble(y = as.numeric(sunspot.year),
                       year = as.integer(time(sunspot.year)))
sunspot_year
## # A tibble: 289 x 2
##        y  year
##    <dbl> <int>
##  1     5  1700
##  2    11  1701
##  3    16  1702
##  4    23  1703
##  5    36  1704
##  6    58  1705
##  7    29  1706
##  8    20  1707
##  9    10  1708
## 10     8  1709
## # … with 279 more rows
ggplot(data = sunspot_year, mapping = aes(x = year, y = y)) +
    geom_point() +
    geom_path() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

AR(1) Model

Stan code

ar_p_stan <- rstan::stan_model("./bugs_time_series_ar_p.stan")
ar_p_stan
## S4 class stanmodel 'bugs_time_series_ar_p' coded as follows:
## data {
##     // Hyperparameters
##     // AR(p)
##     int<lower=0> p;
##     real<lower=0> epsilon_sd[p];
##     real theta0_mean;
##     real<lower=0> theta0_sd;
##     real theta_mean[p];
##     real<lower=0> theta_sd[p];
##     real<lower=0> sigma_mean;
##     real<lower=0> sigma_sd;
##     // Data
##     int<lower=0> N;
##     real y[N];
##     int yr[N];
##     // Forecasting
##     int<lower=0> K;
## }
## 
## transformed data {
## }
## 
## parameters {
##     real theta0;
##     real theta[p];
##     real<lower=0> sigma;
##     real epsilon[p];
## }
## 
## transformed parameters {
##     // Mean function
##     real m[N];
##     // Define the first p elements through errors.
##     // Then define priors for epsilon. Data do not inform these.
##     for (t in 1:p) {
##         m[t] = y[t] - epsilon[t];
##     }
##     // Define the rest through AR(p)
##     for (t in (p + 1):N) {
##         m[t] = theta0;
##         for (i in 1:p) {
##             m[t] += theta[i] * y[t - i];
##         }
##     }
## }
## 
## model {
##     // Priors
##     // theta0
##     target += normal_lpdf(theta0 | theta0_mean, theta0_sd);
##     // theta array
##     for (i in 1:p) {
##         target += normal_lpdf(theta[i] | theta_mean[i], theta_sd[i]);
##     }
##     // sigma
##     target += normal_lpdf(sigma | sigma_mean, sigma_sd);
##     // The first p error terms need appropriate prior distributions.
##     for (t in 1:p) {
##         target += normal_lpdf(epsilon[t] | 0, epsilon_sd[t]);
##     }
## 
##     // Likelihood of AR(p) process
##     // The first p y values do not contribute.
##     for (t in (p + 1):N) {
##         target += normal_lpdf(y[t] | m[t], sigma);
##     }
## }
## 
## generated quantities {
##     // Use all N observed y's for these.
##     real m_rep[N + K];
##     real y_rep[N + K];
##     // Use only first p observed y's for these.
##     real m_new[N + K];
##     real y_new[N + K];
##     // The first p y's are not modeled, so just set to what they are.
##     for (t in 1:p) {
##         m_rep[t] = y[t];
##         y_rep[t] = y[t];
##         m_new[t] = y[t];
##         y_new[t] = y[t];
##     }
##     // AR(p) prediction based on observed y's and
##     for (t in (p + 1):(N + K)) {
##         m_rep[t] = theta0;
##         m_new[t] = theta0;
##         for (i in 1:p) {
##             if (t - i <= N) {
##                 // Calculate m_rep using observed y's as long as they are available.
##                 m_rep[t] += theta[i] * y[t - i];
##             } else {
##                 // Once it's out of bound used the generated values.
##                 m_rep[t] += theta[i] * y_rep[t - i];
##             }
##             // Calculate m_new using generated y's except for the first p.
##             m_new[t] += theta[i] * y_new[t - i];
##         }
##         // Randomly generate y_rep[t] based on the calculated m_rep[t].
##         y_rep[t] = normal_rng(m_rep[t], sigma);
##         // Randomly generate y_new[t] based on the calculated m_new[t].
##         y_new[t] = normal_rng(m_new[t], sigma);
##     }
## }

Stan fit

K <- 50
ar_1_stan_fit <-
    rstan::sampling(ar_p_stan,
                    data = list(p = 1,
                                epsilon_sd = as.array(c(100)),
                                theta0_mean = 0,
                                theta0_sd = 100,
                                theta_mean = as.array(c(0)),
                                theta_sd = as.array(c(100)),
                                sigma_mean = 0,
                                sigma_sd = 100,
                                ##
                                N = length(sunspot.year),
                                y = as.numeric(sunspot.year),
                                yr = as.integer(time(sunspot.year)),
                                K = K))

Diagnostics

Diagnostics indicate the HMC sampler behaved nicely.

relevant_pars <- c("theta0","theta","sigma","lp__")
## Print a summary for a fitted model represented by a 'stanfit' object
print(ar_1_stan_fit, pars = relevant_pars)
## Inference for Stan model: bugs_time_series_ar_p.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##              mean se_mean   sd     2.5%      25%      50%      75%    97.5% n_eff Rhat
## theta0       9.06    0.05 2.12     5.05     7.62     9.05    10.53    13.13  2207    1
## theta[1]     0.82    0.00 0.03     0.75     0.80     0.82     0.84     0.89  2245    1
## sigma       22.79    0.02 0.97    21.03    22.13    22.73    23.42    24.77  3613    1
## lp__     -1328.17    0.04 1.42 -1331.87 -1328.91 -1327.85 -1327.14 -1326.41  1557    1
## 
## Samples were drawn using NUTS(diag_e) at Tue Jun 25 05:56:18 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
## Check HMC diagnostics after sampling
rstan::check_hmc_diagnostics(ar_1_stan_fit)
## 
## Divergences:
## 
## Tree depth:
## 
## Energy:
## Create a matrix of output plots from a 'stanfit' object
pairs(ar_1_stan_fit, pars = relevant_pars)

## Explicity specify HMC diagnostics
bayesplot::mcmc_scatter(as.array(ar_1_stan_fit),
                        pars = c("theta[1]", "sigma"),
                        transform = list("sigma" = log),
                        np = nuts_params(ar_1_stan_fit))

## Markov chain traceplots
rstan::traceplot(ar_1_stan_fit, pars = relevant_pars, inc_warmup = FALSE)

## Trace plots of MCMC draws
## ‘mcmc_rank_hist()’ Whereas traditional trace plots visualize how
##      the chains mix over the course of sampling, rank histograms
##      visualize how the values from the chains mix together in
##      terms of ranking. An ideal plot would show the rankings
##      mixing or overlapping in a uniform distribution. See Vehtari
##      et al. (2019) for details.
bayesplot::mcmc_rank_hist(ar_1_stan_fit, regex_pars = c("theta", "sigma"), ref_line = TRUE)

bayesplot::mcmc_rank_overlay(ar_1_stan_fit, regex_pars = c("theta", "sigma"), ref_line = TRUE)

bayesplot::mcmc_trace_highlight(ar_1_stan_fit, regex_pars = c("theta", "sigma"), highlight = 1)

### Posterior predictive checks Posterior predictive checks indicate there are missed features.

y_rep <- as.matrix(ar_1_stan_fit, pars = "y_rep")[,seq_along(sunspot.year)]
## Density overlay
ppc_dens_overlay(y = as.numeric(sunspot.year),
                 yrep = y_rep[sample(seq_len(nrow(y_rep)), size = 200), ])

## Interval
ppc_intervals(y = as.numeric(sunspot.year),
              yrep = y_rep[sample(seq_len(nrow(y_rep)), size = 200), ],
              x = as.integer(time(sunspot.year)),
              prob = 0.5)

## Quantiles
ppc_stat(y = as.numeric(sunspot.year),
         yrep = y_rep,
         stat = function(y) {quantile(y, probs = 0.25)}) +
    labs(title = "25th Percentile")

ppc_stat(y = as.numeric(sunspot.year),
         yrep = y_rep,
         stat = function(y) {quantile(y, probs = 0.75)}) +
    labs(title = "75th Percentile")

If we condition on the observed y’s (y_rep), we get excellent prediction. If we condition on the initial value only (y_new), the prediction goes to a stationary state. Once it is over the range of y, the y_rep prediction also goes to the same stationary value.

plot_data <-
    bind_rows(sunspot_year %>%
              rename(value = y) %>%
              mutate(type = "y"),
              ##
              as.data.frame(ar_1_stan_fit, pars = "y_rep") %>%
              as_tibble() %>%
              `names<-`(as.character(seq(min(time(sunspot.year)),
                                         length.out = length(sunspot.year) + K))) %>%
              gather(key = year, value = value) %>%
              mutate(year = as.integer(year),
                     type = "y_rep"),
              ##
              as.data.frame(ar_1_stan_fit, pars = "y_new") %>%
              as_tibble() %>%
              `names<-`(as.character(seq(min(time(sunspot.year)),
                                         length.out = length(sunspot.year) + K))) %>%
              gather(key = year, value = value) %>%
              mutate(year = as.integer(year),
                     type = "y_new")) %>%
    group_by(type, year) %>%
    summarize(mean = mean(value),
              `25` = quantile(value, probs = 0.25),
              `75` = quantile(value, probs = 0.75))
## Overlay
ggplot(data = plot_data, mapping = aes(x = year, y = mean, group = type, color = type)) +
    geom_line() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

## Separate
ggplot(data = plot_data,
       mapping = aes(x = year, y = mean, group = type, color = type)) +
    geom_line() +
    geom_ribbon(data = plot_data %>%
                    filter(type != "y"),
                mapping = aes(ymin = `25`,
                              ymax = `75`),
                alpha = 0.5,
                color = NA) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

AR(2) model

Stan fit

ar_2_stan_fit <-
    rstan::sampling(ar_p_stan,
                    data = list(p = 2,
                                epsilon_sd = c(100,100),
                                theta0_mean = 0,
                                theta0_sd = 100,
                                theta_mean = c(0,0),
                                theta_sd = c(100,100),
                                sigma_mean = 0,
                                sigma_sd = 100,
                                ##
                                N = length(sunspot.year),
                                y = as.numeric(sunspot.year),
                                yr = as.integer(time(sunspot.year)),
                                K = K))

Diagnostics

Diagnostics indicate the HMC sampler behaved nicely. However, we observe that two theta parameters are correlated, which may be improved by reparametrization.

## Print a summary for a fitted model represented by a 'stanfit' object
print(ar_2_stan_fit, pars = relevant_pars)
## Inference for Stan model: bugs_time_series_ar_p.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##              mean se_mean   sd     2.5%      25%      50%      75%    97.5% n_eff Rhat
## theta0      15.00    0.03 1.57    11.94    13.95    15.00    16.07    18.05  3675    1
## theta[1]     1.39    0.00 0.04     1.30     1.36     1.39     1.42     1.48  2682    1
## theta[2]    -0.69    0.00 0.04    -0.78    -0.72    -0.69    -0.66    -0.61  2656    1
## sigma       16.72    0.01 0.72    15.40    16.23    16.71    17.18    18.23  4431    1
## lp__     -1246.27    0.04 1.71 -1250.33 -1247.19 -1245.94 -1245.01 -1243.91  1882    1
## 
## Samples were drawn using NUTS(diag_e) at Tue Jun 25 05:56:44 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
## Check HMC diagnostics after sampling
rstan::check_hmc_diagnostics(ar_2_stan_fit)
## 
## Divergences:
## 
## Tree depth:
## 
## Energy:
## Create a matrix of output plots from a 'stanfit' object
pairs(ar_2_stan_fit, pars = relevant_pars)

## Explicity specify HMC diagnostics
bayesplot::mcmc_scatter(as.array(ar_2_stan_fit),
                        pars = c("theta[1]", "sigma"),
                        transform = list("sigma" = log),
                        np = nuts_params(ar_2_stan_fit))

## Markov chain traceplots
rstan::traceplot(ar_2_stan_fit, pars = relevant_pars, inc_warmup = FALSE)

## Trace plots of MCMC draws
## ‘mcmc_rank_hist()’ Whereas traditional trace plots visualize how
##      the chains mix over the course of sampling, rank histograms
##      visualize how the values from the chains mix together in
##      terms of ranking. An ideal plot would show the rankings
##      mixing or overlapping in a uniform distribution. See Vehtari
##      et al. (2019) for details.
bayesplot::mcmc_rank_hist(ar_2_stan_fit, regex_pars = c("theta", "sigma"), ref_line = TRUE)

bayesplot::mcmc_rank_overlay(ar_2_stan_fit, regex_pars = c("theta", "sigma"), ref_line = TRUE)

bayesplot::mcmc_trace_highlight(ar_2_stan_fit, regex_pars = c("theta", "sigma"), highlight = 1)

### Posterior predictive checks Posterior predictive checks indicate there are missed features.

y_rep <- as.matrix(ar_2_stan_fit, pars = "y_rep")[,seq_along(sunspot.year)]
## Density overlay
ppc_dens_overlay(y = as.numeric(sunspot.year),
                 yrep = y_rep[sample(seq_len(nrow(y_rep)), size = 200), ])

## Interval
ppc_intervals(y = as.numeric(sunspot.year),
              yrep = y_rep[sample(seq_len(nrow(y_rep)), size = 200), ],
              x = as.integer(time(sunspot.year)),
              prob = 0.5)

## Quantiles
ppc_stat(y = as.numeric(sunspot.year),
         yrep = y_rep,
         stat = function(y) {quantile(y, probs = 0.25)}) +
    labs(title = "25th Percentile")

ppc_stat(y = as.numeric(sunspot.year),
         yrep = y_rep,
         stat = function(y) {quantile(y, probs = 0.75)}) +
    labs(title = "75th Percentile")

If we condition on the observed y’s (y_rep), we get excellent prediction. If we condition on the initial value only (y_new), the prediction goes to a stationary state. Once it is over the range of y, the y_rep prediction also goes to the same stationary value.

plot_data <-
    bind_rows(sunspot_year %>%
              rename(value = y) %>%
              mutate(type = "y"),
              ##
              as.data.frame(ar_2_stan_fit, pars = "y_rep") %>%
              as_tibble() %>%
              `names<-`(as.character(seq(min(time(sunspot.year)),
                                         length.out = length(sunspot.year) + K))) %>%
              gather(key = year, value = value) %>%
              mutate(year = as.integer(year),
                     type = "y_rep"),
              ##
              as.data.frame(ar_2_stan_fit, pars = "y_new") %>%
              as_tibble() %>%
              `names<-`(as.character(seq(min(time(sunspot.year)),
                                         length.out = length(sunspot.year) + K))) %>%
              gather(key = year, value = value) %>%
              mutate(year = as.integer(year),
                     type = "y_new")) %>%
    group_by(type, year) %>%
    summarize(mean = mean(value),
              `25` = quantile(value, probs = 0.25),
              `75` = quantile(value, probs = 0.75))
## Overlay
ggplot(data = plot_data, mapping = aes(x = year, y = mean, group = type, color = type)) +
    geom_line() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

## Separate
ggplot(data = plot_data,
       mapping = aes(x = year, y = mean, group = type, color = type)) +
    geom_line() +
    geom_ribbon(data = plot_data %>%
                    filter(type != "y"),
                mapping = aes(ymin = `25`,
                              ymax = `75`),
                alpha = 0.5,
                color = NA) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

AR(5) model

Stan fit

ar_5_stan_fit <-
    rstan::sampling(ar_p_stan,
                    data = list(p = 5,
                                epsilon_sd = rep(100,5),
                                theta0_mean = 0,
                                theta0_sd = 100,
                                theta_mean = rep(0,5),
                                theta_sd = rep(100,5),
                                sigma_mean = 0,
                                sigma_sd = 100,
                                ##
                                N = length(sunspot.year),
                                y = as.numeric(sunspot.year),
                                yr = as.integer(time(sunspot.year)),
                                K = K))

Diagnostics

Diagnostics indicate the HMC sampler behaved nicely. However, we observe that two theta parameters are correlated, which may be improved by reparametrization.

## Print a summary for a fitted model represented by a 'stanfit' object
print(ar_5_stan_fit, pars = relevant_pars)
## Inference for Stan model: bugs_time_series_ar_p.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##              mean se_mean   sd     2.5%      25%      50%      75%    97.5% n_eff Rhat
## theta0      15.99    0.04 2.31    11.45    14.47    15.98    17.49    20.57  4037    1
## theta[1]     1.32    0.00 0.06     1.20     1.28     1.32     1.36     1.44  3318    1
## theta[2]    -0.51    0.00 0.10    -0.72    -0.58    -0.51    -0.44    -0.31  2955    1
## theta[3]    -0.20    0.00 0.11    -0.41    -0.27    -0.20    -0.13     0.01  2702    1
## theta[4]     0.08    0.00 0.10    -0.11     0.02     0.08     0.15     0.29  2440    1
## theta[5]    -0.02    0.00 0.06    -0.14    -0.06    -0.02     0.03     0.10  2776    1
## sigma       16.75    0.01 0.74    15.39    16.24    16.72    17.22    18.26  3371    1
## lp__     -1268.65    0.06 2.46 -1274.36 -1270.06 -1268.33 -1266.85 -1264.76  1794    1
## 
## Samples were drawn using NUTS(diag_e) at Tue Jun 25 05:57:19 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
## Check HMC diagnostics after sampling
rstan::check_hmc_diagnostics(ar_5_stan_fit)
## 
## Divergences:
## 
## Tree depth:
## 
## Energy:
## Create a matrix of output plots from a 'stanfit' object
pairs(ar_5_stan_fit, pars = relevant_pars)

## Explicity specify HMC diagnostics
bayesplot::mcmc_scatter(as.array(ar_5_stan_fit),
                        pars = c("theta[1]", "sigma"),
                        transform = list("sigma" = log),
                        np = nuts_params(ar_5_stan_fit))

## Markov chain traceplots
rstan::traceplot(ar_5_stan_fit, pars = relevant_pars, inc_warmup = FALSE)

## Trace plots of MCMC draws
## ‘mcmc_rank_hist()’ Whereas traditional trace plots visualize how
##      the chains mix over the course of sampling, rank histograms
##      visualize how the values from the chains mix together in
##      terms of ranking. An ideal plot would show the rankings
##      mixing or overlapping in a uniform distribution. See Vehtari
##      et al. (2019) for details.
bayesplot::mcmc_rank_hist(ar_5_stan_fit, regex_pars = c("theta", "sigma"), ref_line = TRUE)

bayesplot::mcmc_rank_overlay(ar_5_stan_fit, regex_pars = c("theta", "sigma"), ref_line = TRUE)

bayesplot::mcmc_trace_highlight(ar_5_stan_fit, regex_pars = c("theta", "sigma"), highlight = 1)

### Posterior predictive checks Posterior predictive checks indicate there are missed features.

y_rep <- as.matrix(ar_5_stan_fit, pars = "y_rep")[,seq_along(sunspot.year)]
## Density overlay
ppc_dens_overlay(y = as.numeric(sunspot.year),
                 yrep = y_rep[sample(seq_len(nrow(y_rep)), size = 200), ])

## Interval
ppc_intervals(y = as.numeric(sunspot.year),
              yrep = y_rep[sample(seq_len(nrow(y_rep)), size = 200), ],
              x = as.integer(time(sunspot.year)),
              prob = 0.5)

## Quantiles
ppc_stat(y = as.numeric(sunspot.year),
         yrep = y_rep,
         stat = function(y) {quantile(y, probs = 0.25)}) +
    labs(title = "25th Percentile")

ppc_stat(y = as.numeric(sunspot.year),
         yrep = y_rep,
         stat = function(y) {quantile(y, probs = 0.75)}) +
    labs(title = "75th Percentile")

If we condition on the observed y’s (y_rep), we get excellent prediction. If we condition on the initial value only (y_new), the prediction goes to a stationary state. Once it is over the range of y, the y_rep prediction also goes to the same stationary value.

plot_data <-
    bind_rows(sunspot_year %>%
              rename(value = y) %>%
              mutate(type = "y"),
              ##
              as.data.frame(ar_5_stan_fit, pars = "y_rep") %>%
              as_tibble() %>%
              `names<-`(as.character(seq(min(time(sunspot.year)),
                                         length.out = length(sunspot.year) + K))) %>%
              gather(key = year, value = value) %>%
              mutate(year = as.integer(year),
                     type = "y_rep"),
              ##
              as.data.frame(ar_5_stan_fit, pars = "y_new") %>%
              as_tibble() %>%
              `names<-`(as.character(seq(min(time(sunspot.year)),
                                         length.out = length(sunspot.year) + K))) %>%
              gather(key = year, value = value) %>%
              mutate(year = as.integer(year),
                     type = "y_new")) %>%
    group_by(type, year) %>%
    summarize(mean = mean(value),
              `25` = quantile(value, probs = 0.25),
              `75` = quantile(value, probs = 0.75))
## Overlay
ggplot(data = plot_data, mapping = aes(x = year, y = mean, group = type, color = type)) +
    geom_line() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())

## Separate
ggplot(data = plot_data,
       mapping = aes(x = year, y = mean, group = type, color = type)) +
    geom_line() +
    geom_ribbon(data = plot_data %>%
                    filter(type != "y"),
                mapping = aes(ymin = `25`,
                              ymax = `75`),
                alpha = 0.5,
                color = NA) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
          legend.key = element_blank(),
          plot.title = element_text(hjust = 0.5),
          strip.background = element_blank())


print(sessionInfo())
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bayesplot_1.7.0    rstan_2.18.2       StanHeaders_2.18.1 forcats_0.4.0      stringr_1.4.0     
##  [6] dplyr_0.8.1        purrr_0.3.2        readr_1.3.1        tidyr_0.8.3        tibble_2.1.3      
## [11] ggplot2_3.1.1      tidyverse_1.2.1    doRNG_1.7.1        rngtools_1.3.1.1   pkgmaker_0.27     
## [16] registry_0.5-1     doParallel_1.0.14  iterators_1.0.10   foreach_1.4.4      knitr_1.23        
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.0         jsonlite_1.6       modelr_0.1.4       assertthat_0.2.1   stats4_3.6.0      
##  [6] cellranger_1.1.0   yaml_2.2.0         pillar_1.4.1       backports_1.1.4    lattice_0.20-38   
## [11] glue_1.3.1         digest_0.6.19      rvest_0.3.4        colorspace_1.4-1   htmltools_0.3.6   
## [16] plyr_1.8.4         pkgconfig_2.0.2    bibtex_0.4.2       broom_0.5.2        haven_2.1.0       
## [21] xtable_1.8-4       scales_1.0.0       processx_3.3.1     generics_0.0.2     withr_2.1.2       
## [26] lazyeval_0.2.2     cli_1.1.0          magrittr_1.5       crayon_1.3.4       readxl_1.3.1      
## [31] evaluate_0.14      ps_1.3.0           fansi_0.4.0        nlme_3.1-140       xml2_1.2.0        
## [36] pkgbuild_1.0.3     tools_3.6.0        loo_2.1.0          prettyunits_1.0.2  hms_0.4.2         
## [41] matrixStats_0.54.0 munsell_0.5.0      callr_3.2.0        compiler_3.6.0     rlang_0.3.4       
## [46] grid_3.6.0         ggridges_0.5.1     rstudioapi_0.10    labeling_0.3       rmarkdown_1.13    
## [51] gtable_0.3.0       codetools_0.2-16   inline_0.3.15      reshape2_1.4.3     R6_2.4.0          
## [56] gridExtra_2.3      lubridate_1.7.4    zeallot_0.1.0      utf8_1.1.4         KernSmooth_2.23-15
## [61] stringi_1.4.3      Rcpp_1.0.1         vctrs_0.1.0        tidyselect_0.2.5   xfun_0.7
## Record execution time and multicore use
end_time <- Sys.time()
diff_time <- difftime(end_time, start_time, units = "auto")
cat("Started  ", as.character(start_time), "\n",
    "Finished ", as.character(end_time), "\n",
    "Time difference of ", diff_time, " ", attr(diff_time, "units"), "\n",
    "Used ", foreach::getDoParWorkers(), " cores\n",
    "Used ", foreach::getDoParName(), " as backend\n",
    sep = "")
## Started  2019-06-25 05:55:39
## Finished 2019-06-25 05:57:56
## Time difference of 2.282096 mins
## Used 12 cores
## Used doParallelMC as backend